# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

include(GNUInstallDirs)

# ---[ Project and semantic versioning.
project(PYTORCH_QNNPACK C CXX ASM)

# ---[ Options.
option(PYTORCH_QNNPACK_CUSTOM_THREADPOOL "Build QNNPACK for custom thread pool" OFF)
set(PYTORCH_QNNPACK_LIBRARY_TYPE "default" CACHE STRING "Type of library (shared, static, or default) to build")
set_property(CACHE PYTORCH_QNNPACK_LIBRARY_TYPE PROPERTY STRINGS default static shared)
option(PYTORCH_QNNPACK_BUILD_TESTS "Build QNNPACK unit tests" ON)
option(PYTORCH_QNNPACK_BUILD_BENCHMARKS "Build QNNPACK benchmarks" ON)

# Enable runtime requantization.
add_definitions(-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION=1)

# ---[ CMake options
if(PYTORCH_QNNPACK_BUILD_TESTS)
  enable_testing()
endif()

# ---[ Build flags
if(NOT CMAKE_SYSTEM_PROCESSOR)
  if(IOS)
    list(LENGTH IOS_ARCH IOS_ARCH_COUNT)
    if(IOS_ARCH_COUNT GREATER 1)
      message(FATAL_ERROR "Unsupported QNNPACK build with multiple iOS architectures (${IOS_ARCH}). "
        "Specify a single architecture in IOS_ARCH and re-configure. ")
    endif()
    if(NOT IOS_ARCH MATCHES "^(i386|x86_64|armv7.*|arm64.*)$")
      message(FATAL_ERROR "Unrecognized IOS_ARCH = ${IOS_ARCH}")
    endif()
  else()
    message(FATAL_ERROR "CMAKE_SYSTEM_PROCESSOR is not defined")
  endif()
elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64|armv[5-8].*|aarch64)$")
  message(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_PROCESSOR = ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT CMAKE_SYSTEM_NAME)
  message(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
elseif(NOT CMAKE_SYSTEM_NAME MATCHES "^(Darwin|Linux|Android)$")
  message(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}")
endif()

# ---[ Download deps
set(CONFU_DEPENDENCIES_SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps"
  CACHE PATH "Confu-style dependencies source directory")
set(CONFU_DEPENDENCIES_BINARY_DIR "${CMAKE_BINARY_DIR}/deps"
  CACHE PATH "Confu-style dependencies binary directory")

if(NOT DEFINED CLOG_SOURCE_DIR)
  set(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog")
endif()

if(NOT USE_SYSTEM_CPUINFO)
  if(NOT DEFINED CPUINFO_SOURCE_DIR)
    message(STATUS "Downloading cpuinfo to ${CONFU_DEPENDENCIES_SOURCE_DIR}/cpuinfo (define CPUINFO_SOURCE_DIR to avoid it)")
    configure_file(cmake/DownloadCpuinfo.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo-download/CMakeLists.txt")
    execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo-download")
    execute_process(COMMAND "${CMAKE_COMMAND}" --build .
      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo-download")
    set(CPUINFO_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/cpuinfo" CACHE STRING "cpuinfo source directory")
  endif()
endif()

if(NOT DEFINED FP16_SOURCE_DIR AND NOT USE_SYSTEM_FP16)
  message(STATUS "Downloading FP16 to ${CONFU_DEPENDENCIES_SOURCE_DIR}/fp16 (define FP16_SOURCE_DIR to avoid it)")
  configure_file(cmake/DownloadFP16.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/fp16-download/CMakeLists.txt")
  execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/fp16-download")
  execute_process(COMMAND "${CMAKE_COMMAND}" --build .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/fp16-download")
  set(FP16_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fp16" CACHE STRING "FP16 source directory")
endif()

if(NOT DEFINED FXDIV_SOURCE_DIR AND NOT USE_SYSTEM_FXDIV)
  message(STATUS "Downloading FXdiv to ${CONFU_DEPENDENCIES_SOURCE_DIR}/fxdiv (define FXDIV_SOURCE_DIR to avoid it)")
  configure_file(cmake/DownloadFXdiv.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/fxdiv-download/CMakeLists.txt")
  execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/fxdiv-download")
  execute_process(COMMAND "${CMAKE_COMMAND}" --build .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/fxdiv-download")
  set(FXDIV_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fxdiv" CACHE STRING "FXdiv source directory")
endif()

if(NOT DEFINED PSIMD_SOURCE_DIR AND NOT USE_SYSTEM_PSIMD)
  message(STATUS "Downloading PSimd to ${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd (define PSIMD_SOURCE_DIR to avoid it)")
  configure_file(cmake/DownloadPSimd.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd-download/CMakeLists.txt")
  execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd-download")
  execute_process(COMMAND "${CMAKE_COMMAND}" --build .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd-download")
  set(PSIMD_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd" CACHE STRING "PSimd source directory")
endif()

if(NOT DEFINED PTHREADPOOL_SOURCE_DIR AND NOT USE_SYSTEM_PTHREADPOOL)
  message(STATUS "Downloading pthreadpool to ${CONFU_DEPENDENCIES_SOURCE_DIR}/pthreadpool (define PTHREADPOOL_SOURCE_DIR to avoid it)")
  configure_file(cmake/DownloadPThreadPool.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool-download/CMakeLists.txt")
  execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool-download")
  execute_process(COMMAND "${CMAKE_COMMAND}" --build .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool-download")
  set(PTHREADPOOL_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/pthreadpool" CACHE STRING "pthreadpool source directory")
endif()

if(PYTORCH_QNNPACK_BUILD_TESTS AND NOT DEFINED GOOGLETEST_SOURCE_DIR)
  message(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)")
  configure_file(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt")
  execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
  execute_process(COMMAND "${CMAKE_COMMAND}" --build .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
  set(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory")
endif()

if(PYTORCH_QNNPACK_BUILD_BENCHMARKS AND NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR)
  message(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
  configure_file(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
  execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
  execute_process(COMMAND "${CMAKE_COMMAND}" --build .
    WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
  set(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory")
endif()

# ---[ QNNPACK library
set(PYTORCH_QNNPACK_INIT_SRCS
  src/init.c
  src/add.c
  src/average-pooling.c
  src/channel-shuffle.c
  src/clamp.c
  src/conv-prepack.cc
  src/convolution.c
  src/deconvolution.c
  src/fc-prepack.cc
  src/fully-connected.c
  src/global-average-pooling.c
  src/hardsigmoid.c
  src/hardswish.c
  src/leaky-relu.c
  src/max-pooling.c
  src/sigmoid.c
  src/softargmax.c
  src/tanh.c
  src/operator-delete.c)

set(PYTORCH_QNNPACK_EXEC_SRCS
  src/conv-run.cc
  src/deconv-run.cc
  src/fc-run.cc
  src/fc-dynamic-run.cc
  src/indirection.c
  src/operator-run.c)

set(PYTORCH_QNNPACK_SCALAR_UKERNELS
  src/u8lut32norm/scalar.c
  src/x8lut/scalar.c)

set(PYTORCH_QNNPACK_PSIMD_UKERNELS
  src/sgemm/6x8-psimd.c)

set(PYTORCH_QNNPACK_ARM_NEON_UKERNELS
  src/q8avgpool/mp8x9p8q-neon.c
  src/q8avgpool/up8x9-neon.c
  src/q8avgpool/up8xm-neon.c
  src/q8conv/4x8-neon.c
  src/q8conv/8x8-neon.c
  src/q8dwconv/mp8x25-neon.c
  src/q8dwconv/mp8x25-neon-per-channel.c
  src/q8dwconv/up8x9-neon.c
  src/q8dwconv/up8x9-neon-per-channel.c
  src/q8gavgpool/mp8x7p7q-neon.c
  src/q8gavgpool/up8x7-neon.c
  src/q8gavgpool/up8xm-neon.c
  src/q8gemm/4x-sumrows-neon.c
  src/q8gemm/4x8-neon.c
  src/q8gemm/4x8-dq-neon.c
  src/q8gemm/4x8c2-xzp-neon.c
  src/q8gemm/6x4-neon.c
  src/q8gemm/8x8-neon.c
  src/q8vadd/neon.c
  src/sgemm/5x8-neon.c
  src/sgemm/6x8-neon.c
  src/u8clamp/neon.c
  src/u8maxpool/16x9p8q-neon.c
  src/u8maxpool/sub16-neon.c
  src/u8rmax/neon.c
  src/x8zip/x2-neon.c
  src/x8zip/x3-neon.c
  src/x8zip/x4-neon.c
  src/x8zip/xm-neon.c)

set(PYTORCH_QNNPACK_AARCH32_ASM_UKERNELS
  src/hgemm/8x8-aarch32-neonfp16arith.S
  src/q8conv/4x8-aarch32-neon.S
  src/q8dwconv/up8x9-aarch32-neon.S
  src/q8dwconv/up8x9-aarch32-neon-per-channel.S
  src/q8gemm/4x8-aarch32-neon.S
  src/q8gemm/4x8-dq-aarch32-neon.S
  src/q8gemm/4x8c2-xzp-aarch32-neon.S)

set(PYTORCH_QNNPACK_AARCH64_ASM_UKERNELS
  src/q8conv/8x8-aarch64-neon.S
  src/q8gemm/8x8-aarch64-neon.S
  src/q8gemm/8x8-dq-aarch64-neon.S)

set(PYTORCH_QNNPACK_X86_SSE2_UKERNELS
  src/q8avgpool/mp8x9p8q-sse2.c
  src/q8avgpool/up8x9-sse2.c
  src/q8avgpool/up8xm-sse2.c
  src/q8conv/4x4c2-sse2.c
  src/q8dwconv/mp8x25-sse2.c
  src/q8dwconv/mp8x25-sse2-per-channel.c
  src/q8dwconv/up8x9-sse2.c
  src/q8dwconv/up8x9-sse2-per-channel.c
  src/q8gavgpool/mp8x7p7q-sse2.c
  src/q8gavgpool/up8x7-sse2.c
  src/q8gavgpool/up8xm-sse2.c
  src/q8gemm/2x4c8-sse2.c
  src/q8gemm/4x4c2-dq-sse2.c
  src/q8gemm/4x4c2-sse2.c
  src/q8vadd/sse2.c
  src/u8clamp/sse2.c
  src/u8maxpool/16x9p8q-sse2.c
  src/u8maxpool/sub16-sse2.c
  src/u8rmax/sse2.c
  src/x8zip/x2-sse2.c
  src/x8zip/x3-sse2.c
  src/x8zip/x4-sse2.c
  src/x8zip/xm-sse2.c)

set(PYTORCH_QNNPACK_UKERNELS ${PYTORCH_QNNPACK_SCALAR_UKERNELS} ${PYTORCH_QNNPACK_PSIMD_UKERNELS})
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")
  list(APPEND PYTORCH_QNNPACK_UKERNELS ${PYTORCH_QNNPACK_ARM_NEON_UKERNELS})
  list(APPEND PYTORCH_QNNPACK_UKERNELS ${PYTORCH_QNNPACK_AARCH32_ASM_UKERNELS})
endif()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR IOS_ARCH MATCHES "^arm64.*")
  list(APPEND PYTORCH_QNNPACK_UKERNELS ${PYTORCH_QNNPACK_ARM_NEON_UKERNELS})
  list(APPEND PYTORCH_QNNPACK_UKERNELS ${PYTORCH_QNNPACK_AARCH64_ASM_UKERNELS})
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
  list(APPEND PYTORCH_QNNPACK_UKERNELS ${PYTORCH_QNNPACK_X86_SSE2_UKERNELS})
endif()

if(PYTORCH_QNNPACK_LIBRARY_TYPE STREQUAL "default")
  add_library(pytorch_qnnpack ${PYTORCH_QNNPACK_INIT_SRCS} ${PYTORCH_QNNPACK_EXEC_SRCS} ${PYTORCH_QNNPACK_UKERNELS})
elseif(PYTORCH_QNNPACK_LIBRARY_TYPE STREQUAL "shared")
  add_library(pytorch_qnnpack SHARED ${PYTORCH_QNNPACK_INIT_SRCS} ${PYTORCH_QNNPACK_EXEC_SRCS} ${PYTORCH_QNNPACK_UKERNELS})
elseif(PYTORCH_QNNPACK_LIBRARY_TYPE STREQUAL "static")
  add_library(pytorch_qnnpack STATIC ${PYTORCH_QNNPACK_INIT_SRCS} ${PYTORCH_QNNPACK_EXEC_SRCS} ${PYTORCH_QNNPACK_UKERNELS})
else()
  message(FATAL_ERROR "Unsupported QNNPACK library type \"${PYTORCH_QNNPACK_LIBRARY_TYPE}\". Must be \"static\", \"shared\", or \"default\"")
endif()
set_target_properties(pytorch_qnnpack PROPERTIES
  CXX_STANDARD 14
  C_STANDARD 11
  C_EXTENSIONS YES)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")
  set_property(SOURCE ${PYTORCH_QNNPACK_ARM_NEON_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 -marm -mfpu=neon ")
  if(IOS)
    set_property(SOURCE ${PYTORCH_QNNPACK_AARCH32_ASM_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -arch ${IOS_ARCH} ")
  endif()
endif()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR IOS_ARCH MATCHES "^arm64.*")
  set_property(SOURCE ${PYTORCH_QNNPACK_ARM_NEON_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 ")
  if(IOS)
    set_property(SOURCE ${PYTORCH_QNNPACK_AARCH64_ASM_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -arch ${IOS_ARCH} ")
  endif()
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
  set_property(SOURCE ${PYTORCH_QNNPACK_X86_SSE2_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 -msse2 ")
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")
  set_property(SOURCE ${PYTORCH_QNNPACK_PSIMD_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 -marm -mfpu=neon ")
  set_property(SOURCE ${PYTORCH_QNNPACK_SCALAR_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 -marm ")
else()
  set_property(SOURCE ${PYTORCH_QNNPACK_PSIMD_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 ")
  set_property(SOURCE ${PYTORCH_QNNPACK_SCALAR_UKERNELS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 ")
endif()
set_property(SOURCE ${PYTORCH_QNNPACK_INIT_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -Os ")
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
  set_property(SOURCE ${PYTORCH_QNNPACK_OPERATOR_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -O2 ")
endif()
target_include_directories(pytorch_qnnpack PUBLIC include)
target_include_directories(pytorch_qnnpack PUBLIC src)
set_target_properties(pytorch_qnnpack PROPERTIES PUBLIC_HEADER include/pytorch_qnnpack.h)
set_target_properties(pytorch_qnnpack PROPERTIES PUBLIC_HEADER include/conv_utils.h)
set_target_properties(pytorch_qnnpack PROPERTIES PUBLIC_HEADER include/qnnpack_func.h)

# ---[ Configure clog
if(NOT TARGET clog)
  set(CLOG_BUILD_TESTS OFF CACHE BOOL "")
  set(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
  add_subdirectory(
    "${CLOG_SOURCE_DIR}"
    "${CONFU_DEPENDENCIES_BINARY_DIR}/clog")
  # We build static version of clog but a dynamic library may indirectly depend on it
  set_property(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
endif()
target_link_libraries(pytorch_qnnpack PUBLIC clog)

# ---[ Configure cpuinfo
if(NOT TARGET cpuinfo AND USE_SYSTEM_CPUINFO)
  add_library(cpuinfo SHARED IMPORTED)
  find_library(CPUINFO_LIBRARY cpuinfo)
  if(NOT CPUINFO_LIBRARY)
    message(FATAL_ERROR "Cannot find cpuinfo")
  endif()
  message("Found cpuinfo: ${CPUINFO_LIBRARY}")
  set_target_properties(cpuinfo PROPERTIES IMPORTED_LOCATION "${CPUINFO_LIBRARY}")
elseif(NOT TARGET cpuinfo)
  set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "")
  set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "")
  set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "")
  set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "")
  add_subdirectory(
    "${CPUINFO_SOURCE_DIR}"
    "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo")
endif()
target_link_libraries(pytorch_qnnpack PRIVATE cpuinfo)

# ---[ Configure pthreadpool
if(NOT TARGET pthreadpool AND NOT USE_SYSTEM_PTHREADPOOL)
  set(PTHREADPOOL_BUILD_TESTS OFF CACHE BOOL "")
  set(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE BOOL "")
  add_subdirectory(
    "${PTHREADPOOL_SOURCE_DIR}"
    "${CONFU_DEPENDENCIES_BINARY_DIR}/pthreadpool")
elseif(NOT TARGET pthreadpool AND USE_SYSTEM_PTHREADPOOL)
  add_library(pthreadpool SHARED IMPORTED)
  find_library(PTHREADPOOL_LIBRARY pthreadpool)
  if(NOT PTHREADPOOL_LIBRARY)
    message(FATAL_ERROR "Cannot find pthreadpool")
  endif()
  message("-- Found pthreadpool: ${PTHREADPOOL_LIBRARY}")
  set_target_properties(pthreadpool PROPERTIES
    IMPORTED_LOCATION "${PTHREADPOOL_LIBRARY}")
  add_library(pthreadpool_interface INTERFACE)
endif()
if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
  # Depend on pthreadpool interface, but not on implementation.
  # This is used when QNNPACK user (e.g. Caffe2) provides its own threadpool implementation.
  target_link_libraries(pytorch_qnnpack PUBLIC pthreadpool_interface)
else()
  target_link_libraries(pytorch_qnnpack PUBLIC pthreadpool)
endif()

# ---[ Configure FXdiv
if(NOT TARGET fxdiv AND NOT USE_SYSTEM_FXDIV)
  set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
  set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
  add_subdirectory(
    "${FXDIV_SOURCE_DIR}"
    "${CONFU_DEPENDENCIES_BINARY_DIR}/fxdiv")
elseif(NOT TARGET fxdiv AND USE_SYSTEM_FXDIV)
  find_file(FXDIV_HDR fxdiv.h PATH_SUFFIXES include)
  if(NOT FXDIV_HDR)
    message(FATAL_ERROR "Cannot find fxdiv")
  endif()
  add_library(fxdiv STATIC "${FXDIV_HDR}")
  set_property(TARGET fxdiv PROPERTY LINKER_LANGUAGE C)
endif()
target_link_libraries(pytorch_qnnpack PRIVATE fxdiv)

# ---[ Configure psimd
if(NOT TARGET psimd AND NOT USE_SYSTEM_PSIMD)
  add_subdirectory(
    "${PSIMD_SOURCE_DIR}"
    "${CONFU_DEPENDENCIES_BINARY_DIR}/psimd")
elseif(NOT TARGET psimd AND USE_SYSTEM_PSIMD)
  find_file(PSIMD_HDR psimd.h PATH_SUFFIXES include)
  if(NOT PSIMD_HDR)
    message(FATAL_ERROR "Cannot find psimd")
  endif()
  add_library(psimd STATIC "${PSIMD_HDR}")
  set_property(TARGET psimd PROPERTY LINKER_LANGUAGE C)
endif()
target_link_libraries(pytorch_qnnpack PRIVATE psimd)

# ---[ Configure FP16
if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
  set(FP16_BUILD_TESTS OFF CACHE BOOL "")
  set(FP16_BUILD_BENCHMARKS OFF CACHE BOOL "")
  add_subdirectory(
    "${FP16_SOURCE_DIR}"
    "${CONFU_DEPENDENCIES_BINARY_DIR}/fp16")
elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
  find_file(FP16_HDR fp16.h PATH_SUFFIXES include)
  if(NOT FP16_HDR)
    message(FATAL_ERROR "Cannot find fp16")
  endif()
  add_library(fp16 STATIC "${FP16_HDR}")
  set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
endif()
target_link_libraries(pytorch_qnnpack PRIVATE fp16)

install(TARGETS pytorch_qnnpack
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
    PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

# ---[ QNNPACK unit tests
if(PYTORCH_QNNPACK_BUILD_TESTS)
  # ---[ Build google test
  if(NOT TARGET gtest)
    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
    add_subdirectory(
      "${GOOGLETEST_SOURCE_DIR}"
      "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest")
  endif()

  # ---[ Build unit tests for high-level functionality
  add_executable(convolution-test test/convolution.cc)
  set_target_properties(convolution-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(convolution-test PRIVATE src test)
  target_link_libraries(convolution-test PRIVATE pytorch_qnnpack clog cpuinfo fp16 gtest gtest_main)
  add_test(convolution-test convolution-test)

  add_executable(deconvolution-test test/deconvolution.cc)
  set_target_properties(deconvolution-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(deconvolution-test PRIVATE src test)
  target_link_libraries(deconvolution-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(deconvolution-test deconvolution-test)

  add_executable(fully-connected-test test/fully-connected.cc)
  set_target_properties(fully-connected-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(fully-connected-test PRIVATE src test)
  target_link_libraries(fully-connected-test PRIVATE pytorch_qnnpack clog cpuinfo fp16 gtest gtest_main)
  add_test(fully-connected-test fully-connected-test)

  add_executable(channel-shuffle-test test/channel-shuffle.cc)
  set_target_properties(channel-shuffle-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(channel-shuffle-test PRIVATE src test)
  target_link_libraries(channel-shuffle-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(channel-shuffle-test channel-shuffle-test)

  add_executable(add-test test/add.cc)
  set_target_properties(add-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(add-test PRIVATE src test)
  target_link_libraries(add-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(add-test add-test)

  add_executable(leaky-relu-test test/leaky-relu.cc)
  set_target_properties(leaky-relu-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(leaky-relu-test PRIVATE src test)
  target_link_libraries(leaky-relu-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(leaky-relu-test leaky-relu-test)

  add_executable(sigmoid-test test/sigmoid.cc)
  set_target_properties(sigmoid-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(sigmoid-test PRIVATE src test)
  target_link_libraries(sigmoid-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(sigmoid-test sigmoid-test)

  add_executable(clamp-test test/clamp.cc)
  set_target_properties(clamp-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(clamp-test PRIVATE src test)
  target_link_libraries(clamp-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(clamp-test clamp-test)

  add_executable(softargmax-test test/softargmax.cc)
  set_target_properties(softargmax-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(softargmax-test PRIVATE src test)
  target_link_libraries(softargmax-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(softargmax-test softargmax-test)

  add_executable(tanh-test test/tanh.cc)
  set_target_properties(tanh-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(tanh-test PRIVATE src test)
  target_link_libraries(tanh-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(tanh-test tanh-test)

  add_executable(hardsigmoid-test test/hardsigmoid.cc)
  set_target_properties(hardsigmoid-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(hardsigmoid-test PRIVATE src test)
  target_link_libraries(hardsigmoid-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(hardsigmoid-test hardsigmoid-test)

  add_executable(hardswish-test test/hardswish.cc)
  set_target_properties(hardswish-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(hardswish-test PRIVATE src test)
  target_link_libraries(hardswish-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(hardswish-test hardswish-test)

  add_executable(max-pooling-test test/max-pooling.cc)
  set_target_properties(max-pooling-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(max-pooling-test PRIVATE src test)
  target_link_libraries(max-pooling-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(max-pooling-test max-pooling-test)

  add_executable(average-pooling-test test/average-pooling.cc)
  set_target_properties(average-pooling-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(average-pooling-test PRIVATE src test)
  target_link_libraries(average-pooling-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(average-pooling-test average-pooling-test)

  add_executable(global-average-pooling-test test/global-average-pooling.cc)
  set_target_properties(global-average-pooling-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(global-average-pooling-test PRIVATE src test)
  target_link_libraries(global-average-pooling-test PRIVATE pytorch_qnnpack cpuinfo gtest gtest_main)
  add_test(global-average-pooling-test global-average-pooling-test)

  # ---[ Build unit tests for micro-kernels
  add_executable(q8gemm-test test/q8gemm.cc)
  set_target_properties(q8gemm-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8gemm-test PRIVATE src test)
  target_link_libraries(q8gemm-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(q8gemm-test q8gemm-test)

  add_executable(q8conv-test test/q8conv.cc)
  set_target_properties(q8conv-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8conv-test PRIVATE src test)
  target_link_libraries(q8conv-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(q8conv-test q8conv-test)

  add_executable(q8dwconv-test test/q8dwconv.cc)
  set_target_properties(q8dwconv-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8dwconv-test PRIVATE src test)
  target_link_libraries(q8dwconv-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(q8dwconv-test q8dwconv-test)

  add_executable(q8vadd-test test/q8vadd.cc)
  set_target_properties(q8vadd-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8vadd-test PRIVATE src test)
  target_link_libraries(q8vadd-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(q8vadd-test q8vadd-test)

  add_executable(q8avgpool-test test/q8avgpool.cc)
  set_target_properties(q8avgpool-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8avgpool-test PRIVATE src test)
  target_link_libraries(q8avgpool-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(q8avgpool-test q8avgpool-test)

  add_executable(q8gavgpool-test test/q8gavgpool.cc)
  set_target_properties(q8gavgpool-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8gavgpool-test PRIVATE src test)
  target_link_libraries(q8gavgpool-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(q8gavgpool-test q8gavgpool-test)

  add_executable(u8maxpool-test test/u8maxpool.cc)
  set_target_properties(u8maxpool-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(u8maxpool-test PRIVATE src test)
  target_link_libraries(u8maxpool-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(u8maxpool-test u8maxpool-test)

  add_executable(u8clamp-test test/u8clamp.cc)
  set_target_properties(u8clamp-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(u8clamp-test PRIVATE src test)
  target_link_libraries(u8clamp-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(u8clamp-test u8clamp-test)

  add_executable(u8rmax-test test/u8rmax.cc)
  set_target_properties(u8rmax-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(u8rmax-test PRIVATE src test)
  target_link_libraries(u8rmax-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(u8rmax-test u8rmax-test)

  add_executable(u8lut32norm-test test/u8lut32norm.cc)
  set_target_properties(u8lut32norm-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(u8lut32norm-test PRIVATE src test)
  target_link_libraries(u8lut32norm-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(u8lut32norm-test u8lut32norm-test)

  add_executable(x8lut-test test/x8lut.cc)
  set_target_properties(x8lut-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(x8lut-test PRIVATE src test)
  target_link_libraries(x8lut-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(x8lut-test x8lut-test)

  add_executable(x8zip-test test/x8zip.cc)
  set_target_properties(x8zip-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(x8zip-test PRIVATE src test)
  target_link_libraries(x8zip-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(x8zip-test x8zip-test)

  add_executable(hgemm-test test/hgemm.cc)
  set_target_properties(hgemm-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(hgemm-test PRIVATE src test)
  target_link_libraries(hgemm-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(hgemm-test hgemm-test)

  add_executable(sgemm-test test/sgemm.cc)
  set_target_properties(sgemm-test PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(sgemm-test PRIVATE src test)
  target_link_libraries(sgemm-test PRIVATE pytorch_qnnpack cpuinfo fp16 gtest gtest_main)
  add_test(sgemm-test sgemm-test)
endif()

# ---[ QNNPACK micro-benchmarks
if(PYTORCH_QNNPACK_BUILD_BENCHMARKS)
  # ---[ Build google benchmark
  if(NOT TARGET benchmark)
    set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "")
    add_subdirectory(
      "${GOOGLEBENCHMARK_SOURCE_DIR}"
      "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark")
  endif()

  add_executable(add-bench bench/add.cc)
  set_target_properties(add-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(add-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(average-pooling-bench bench/average-pooling.cc)
  set_target_properties(average-pooling-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(average-pooling-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(channel-shuffle-bench bench/channel-shuffle.cc)
  set_target_properties(channel-shuffle-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(channel-shuffle-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(convolution-bench bench/convolution.cc)
  set_target_properties(convolution-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(convolution-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(global-average-pooling-bench bench/global-average-pooling.cc)
  set_target_properties(global-average-pooling-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(global-average-pooling-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(max-pooling-bench bench/max-pooling.cc)
  set_target_properties(max-pooling-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(max-pooling-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(sigmoid-bench bench/sigmoid.cc)
  set_target_properties(sigmoid-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(sigmoid-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(softargmax-bench bench/softargmax.cc)
  set_target_properties(softargmax-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(softargmax-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(tanh-bench bench/tanh.cc)
  set_target_properties(tanh-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(tanh-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(hardsigmoid-bench bench/hardsigmoid.cc)
  set_target_properties(hardsigmoid-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(hardsigmoid-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(hardswish-bench bench/hardswish.cc)
  set_target_properties(hardswish-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_link_libraries(hardswish-bench PRIVATE pytorch_qnnpack benchmark)

  add_executable(q8gemm-bench bench/q8gemm.cc)
  set_target_properties(q8gemm-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(q8gemm-bench PRIVATE src)
  target_compile_definitions(q8gemm-bench PRIVATE pytorch_PYTORCH_QNNPACK_BENCHMARK_GEMMLOWP=0)
  target_link_libraries(q8gemm-bench PRIVATE pytorch_qnnpack cpuinfo fp16 benchmark)

  add_executable(hgemm-bench bench/hgemm.cc)
  set_target_properties(hgemm-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(hgemm-bench PRIVATE src)
  target_link_libraries(hgemm-bench PRIVATE pytorch_qnnpack cpuinfo fp16 benchmark)

  add_executable(sgemm-bench bench/sgemm.cc)
  set_target_properties(sgemm-bench PROPERTIES
    CXX_STANDARD 14
    CXX_STANDARD_REQUIRED YES
    CXX_EXTENSIONS NO)
  target_include_directories(sgemm-bench PRIVATE src)
  target_link_libraries(sgemm-bench PRIVATE pytorch_qnnpack cpuinfo benchmark)
endif()
